/**********************************************************************
DO FILE SUMMARY

Rural-Urban Differences in Diabetes Care and Control in 40 Low- and Middle-Income
Countries: A Cross-Sectional Study of Nationally Representative, Individual-Level Data

Sensitivity analyses

David Flood, for the HPACC collaborators
University of Michigan

September 19, 2021
**********************************************************************/

////////////////////////////////////////////////////////////////////////////////
////////////////// SENSITIVITY ANALYSIS 1: UNADJUSTED PROPORTIONS //////////////
////////////////////////////////////////////////////////////////////////////////

/*
This is calculated as part of the weq pooled code. It only requires making the
bar graph and appendix table with underlying data
*/

/*******************************************************************************
WEQ PROPORTION PLOTS
*******************************************************************************/

* 1. Generating the spreadsheet using putexcel ---------------------------------


* Create the Excel doc
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Putexcel tables"
putexcel set "weq prop plot ${date}.xlsx", replace 

* Making Excel frame
putexcel A1 = "name"
putexcel A2 = "Ever tested"
putexcel A3 = "Awareness"
putexcel A4 = "Glucose- lowering med"
putexcel A5 = "BP-lowering med"
putexcel A6 = "Statin"
putexcel A7 = "Glycemic control"
putexcel A8 = "BP control"
putexcel A9 = "Cholesterol control"
putexcel A10 = "Combined AB control"
putexcel A11 = "Combined ABC control"
	
putexcel B1 = "est_pred_urban"
putexcel C1 = "lci_pred_urban"
putexcel D1 = "uci_pred_urban"
putexcel E1 = "est_pred_rural"
putexcel F1 = "lci_pred_rural"
putexcel G1 = "uci_pred_rural"

* Exporting to Excel

local n = 1

foreach outcome in $dependent_vars {	
	
	local n = `n' + 1
	
	matlist weq_prop_`outcome'
	matrix results = weq_prop_`outcome'

	putexcel B`n' = results[1,3]*100
	putexcel C`n' = results[5,3]*100
	putexcel D`n' = results[6,3]*100
	putexcel E`n' = results[1,4]*100
	putexcel F`n' = results[5,4]*100
	putexcel G`n' = results[6,4]*100

}

/*******************************************************************************
WEQ PROPORTIONS MARGINS APPENDIX TABLE
*******************************************************************************/

* 1. Generating the spreadsheet using putexcel ---------------------------------

	/*	This step creates an Excel file with the risk ratios and average marginal
		effects for the aggregate weq regression output.
	*/

* Create the Excel doc
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Putexcel tables"
putexcel set "Appendix - weq prop table ${date}.xlsx", replace 

* Making Excel frame
putexcel A1 = "Goal"
putexcel A2 = "Ever tested"
putexcel A3 = "Awareness"
putexcel A4 = "Glucose- lowering med"
putexcel A5 = "BP-lowering med"
putexcel A6 = "Statin"
putexcel A7 = "Glycemic control"
putexcel A8 = "BP control"
putexcel A9 = "Cholesterol control"
putexcel A10 = "Combined AB control"
putexcel A11 = "Combined ABC control"

putexcel B1 = "Urban"
putexcel C1 = "Rural"

* Exporting to Excel

local n = 1

foreach outcome in $dependent_vars {	
	
	local n = `n' + 1
	
	matlist weq_prop_`outcome'
	matrix results = weq_prop_`outcome'
	
	* Urban
	local b = string(results[1,3]*100,"%9.1f")
	local lci = string(results[5,3]*100,"%9.1f")
	local uci = string(results[6,3]*100,"%9.1f")
	
	putexcel B`n' = "`b' (`lci'-`uci')"

	* Rural
	local b = string(results[1,4]*100,"%9.1f")
	local lci = string(results[5,4]*100,"%9.1f")
	local uci = string(results[6,4]*100,"%9.1f")
	
	putexcel C`n' = "`b' (`lci'-`uci')"
}


////////////////////////////////////////////////////////////////////////////////
////////////////// SENSITIVITY ANALYSIS 2: INCLUSION OF EDUCATION //////////////
////////////////////////////////////////////////////////////////////////////////

/*******************************************************************************
weq_edu REGRESSIONS
*******************************************************************************/

foreach outcome of varlist $dependent_vars {			

		* Generate sample
		gen sample = 1 if analytic_sample == 1 & `outcome' != . & rural != . & sex != . & educat3 != .

		* Rescaling weights
			* bys country: egen sum_wpop_sample=sum(w_fbg) if sample==1   // pop weights not needed
			* gen wpopnr_sample=w_fbg*Population2015/sum_wpop_sample

			bys country: egen sum_weq_sample=sum(w_fbg) if sample==1		
			gen weqnr_sample=w_fbg/sum_weq_sample
		
		* Make age spline
			mkspline age_spline = age if sample == 1, cubic nknots(5) displayknots
		
		* Set svyset
		svyset psu_num[pw = weqnr_sample], strata(stratum_num) singleunit(centered) // note weights are not rescaled
		
		* Raw proportions
		svy, subpop(sample): proportion `outcome', over(rural)
		matlist r(table)
		matrix weq_edu_prop_`outcome' = r(table)
		
		* Regression
		svy, subpop(sample): poisson `outcome' i.rural i.educat3 c.age_spline* i.country_encoded, irr baselevels
		matlist r(table)
		matrix weq_edu_rr_`outcome' = r(table)
		
		* Predictive margins
		margins rural, vce(unconditional)
		matlist r(table)
		matrix weq_edu_pred_`outcome' = r(table)
										
		* dydx margins
		margins, dydx(rural) vce(unconditional)
		matlist r(table)
		matrix weq_edu_dydx_`outcome' = r(table)
		
		* Drop variables for loop
		drop sample weqnr_sample sum_weq_sample age_spline*
		}	

/*******************************************************************************
weq_edu RR FOREST PLOTS
*******************************************************************************/

* 1. Generating the spreadsheet using putexcel ---------------------------------

	/*	This step creates an Excel file with the risk ratios and average marginal
		effects for the aggregate weq regression output.
	*/

* Create the Excel doc
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Putexcel tables"
putexcel set "weq_edu rr forestplot ${date}.xlsx", replace 

* Making Excel frame
putexcel A1 = "name"
putexcel A2 = "Ever tested"
putexcel A3 = "Awareness"
putexcel A4 = "Glucose-lowering med"
putexcel A5 = "BP-lowering med"
putexcel A6 = "Statin"
putexcel A7 = "Glycemic control"
putexcel A8 = "BP control"
putexcel A9 = "Cholesterol control"
putexcel A10 = "Combined AB control"
putexcel A11 = "Combined ABC control"
	
putexcel B1 = "est_rr"
putexcel C1 = "lci_rr"
putexcel D1 = "uci_rr"
putexcel E1 = "est_dydx"
putexcel F1 = "lci_dydx"
putexcel G1 = "uci_dydx"
putexcel H1 = "p_value"
putexcel I1 = "est_urban_pred"
putexcel J1 = "lci_urban_pred"
putexcel K1 = "uci_urban_pred"
putexcel L1 = "est_rural_pred"
putexcel M1 = "lci_rural_pred"
putexcel N1 = "uci_rural_pred"

* Exporting to Excel

local n = 1

foreach outcome in $dependent_vars {	
	
	local n = `n' + 1
	
	matlist weq_edu_rr_`outcome'
	matrix results = weq_edu_rr_`outcome'

	local b = results[1,2]
	local lci = results[5,2]
	local uci = results[6,2]
	local p = results[4,2]

	putexcel B`n' = `b'
	putexcel C`n' = `lci'
	putexcel D`n' = `uci'
	putexcel H`n' = `p'
	
	matlist weq_edu_dydx_`outcome'
	matrix results = weq_edu_dydx_`outcome'

	local b = results[1,2]*100
	local lci = results[5,2]*100
	local uci = results[6,2]*100

	putexcel E`n' = `b'
	putexcel F`n' = `lci'
	putexcel G`n' = `uci'
	
	matlist weq_edu_pred_`outcome'
	matrix results = weq_edu_pred_`outcome'
	
	putexcel I`n' = results[1,1]*100
	putexcel J`n' = results[5,1]*100
	putexcel K`n' = results[6,1]*100
	putexcel L`n' = results[1,2]*100
	putexcel M`n' = results[5,2]*100
	putexcel N`n' = results[6,2]*100

}

* 2. Generating weq rr forestplots ---------------------------------------------

frame create weq_edu_forestplot
frame change weq_edu_forestplot
	
import excel "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Putexcel tables/weq_edu rr forestplot $date.xlsx", sheet("Sheet1") firstrow cellrange(A1:N11) clear

insobs 1, before(1)
insobs 2, after(3)
insobs 2, after(8)

gen id = _n

replace name = "{bf:Diagnosis}" if id == 1
replace name = " " if id == 4
replace name = "{bf:Treatment}" if id == 5
replace name = " " if id == 9
replace name = "{bf:Control}" if id == 10

* See help for what each of the "_USE" categories refers to	
gen _USE = 1
replace _USE = 0 if inlist(id,1,5,10)
replace _USE = 6 if missing(name)

* Transforming for eform display in forestplot
gen est_rr_exp = log(est_rr)
gen lci_rr_exp = log(lci_rr)
gen uci_rr_exp = log(uci_rr)

* Making string variables	
gen pvalue_s = string(p_value,"%4.3f")
replace pvalue_s = "<0.001" if p_value <0.001
replace pvalue_s = "" if p_value == .

gen est_rr_s = string(est_rr,"%9.2f")
gen lci_rr_s = string(lci_rr,"%9.2f")
gen uci_rr_s = string(uci_rr,"%9.2f")
gen output_rr_s = est_rr_s + " (" + lci_rr_s + " to " + uci_rr_s + ")"
replace output_rr_s = "" if est_rr == .

gen est_dydx_s = string(est_dydx,"%9.1f")
gen lci_dydx_s = string(lci_dydx,"%9.1f")
gen uci_dydx_s = string(uci_dydx,"%9.1f")
gen output_dydx_s = est_dydx_s + " (" + lci_dydx_s + " to " + uci_dydx_s + ")"
replace output_dydx_s = "" if est_dydx == .

gen est_urban_pred_s = string(est_urban_pred,"%9.1f")
gen lci_urban_pred_s = string(lci_urban_pred,"%9.1f")
gen uci_urban_pred_s = string(uci_urban_pred,"%9.1f")
gen output_urban_pred_s = est_urban_pred_s + " (" + lci_urban_pred_s + " to " + uci_urban_pred_s + ")"
replace output_urban_pred_s = "" if est_urban_pred == .

gen est_rural_pred_s = string(est_rural_pred,"%9.1f")
gen lci_rural_pred_s = string(lci_rural_pred,"%9.1f")
gen uci_rural_pred_s = string(uci_rural_pred,"%9.1f")
gen output_rural_pred_s = est_rural_pred_s + " (" + lci_rural_pred_s + " to " + uci_rural_pred_s + ")"
replace output_rural_pred_s = "" if est_rural_pred == .

* Realiigning
leftalign

* Adding bolded labels		
label var name `"`"{bf:Outcome}"'"'
label var output_rr_s `"`"{bf:Risk ratio}"'"'
label var output_dydx_s `"`"{bf:Average marginal}"' `"{bf:effect (%)}"'"'
label var pvalue_s `"`"{bf:P value}"'"'
label var output_urban_pred_s `"`"{bf:Urban adjusted}"' `"{bf:proportion (%)}"'"'
label var output_rural_pred_s `"`"{bf:Rural adjusted}"' `"{bf:proportion (%)}"'"'

format %-6s pvalue_s

/*	Note: This code uses the "forestplot" program built by David Fisher. He has a number
	of variable helpful posts on Statalist:
	
	https://www.statalist.org/forums/forum/general-stata-discussion/general/1471066-editing-headings-on-metan-forest-plots
*/

local line_size ".2rs"
local forest_font_size "2.1"
local arrow_font_size "2"	

forestplot est_rr_exp lci_rr_exp uci_rr_exp, ///
	labels(name) ///
	eform ///
	nostats /// this tells it not to calculate stats but rather use raw input data
	rcols(output_rr_s output_dydx_s pvalue_s output_urban_pred_s output_rural_pred_s) /// this tells forestplot which columsn to display
	range(0.25 1.8) /// range of the plot
	xlabel(0.25 0.5 1 2, labsize(2)) /// plot labels xmtick(0.3(0.1)1.5) ///
	astext(85) ///
	spacing(1.5) /// this refers to spacing by line
	/// style options
		diamopts(lwidth(`line_size')) ///
		boxopts(mcolor(none)) ///
		ciopts(lwidth(`line_size')) ///
		nlineopts(lwidth(`line_size')) ///
		olineopts(lwidth(0)) ///
		pointopts(msymbol(square) msize(0.25rs)) ///
	plotregion(margin(l=0 r=0 b=0rs t=0) lcolor(none)) ///
	graphregion(margin(l=0 r=0 b=0rs t=0))	///
	xsize(4) /// 	
	ysize(3) ///
	xtitle("Risk ratio", size(`forest_font_size')  margin(l=0 r=60 b=0 t=1)) ///
	/// favours("Less rural     " "achievement      " # "     More rural" "     achievement", labsize(2.1rs) nosymmetric) ///
	name(weq_edu_forest_plot,replace) //

* Fixing formatting issues	----------------------------------------------------

* Size of text in forestplot
	gr_edit plotregion1.plot5.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot6.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot7.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot8.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot9.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot10.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	
* Top black line
gr_edit plotregion1._xylines[1].style.editstyle linestyle(color(black)) editcopy // gray line at top

* Arrows and labels for "favors"
gr_edit .AddLine added_lines editor 40 17 50 17
		gr_edit .added_lines_new = 1
		gr_edit .added_lines_rec = 1
		gr_edit .added_lines[1].style.editstyle  linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) headstyle( symbol(circle) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) fillcolor(black) size( sztype(relative) val(1.52778) allow_pct(1)) angle(stdarrow) symangle(zero) backsymbol(none) backline( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) backcolor(black) backsize( sztype(relative) val(0) allow_pct(1)) backangle(stdarrow) backsymangle(zero)) headpos(neither) editcopy
		gr_edit .added_lines[1].style.editstyle headstyle(size(medium)) editcopy
		gr_edit .added_lines[1].style.editstyle headpos(head) editcopy

		gr_edit .AddLine added_lines editor 34 17 24 17
		gr_edit .added_lines_new = 2
		gr_edit .added_lines_rec = 2
		gr_edit .added_lines[2].style.editstyle  linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) headstyle( symbol(circle) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) fillcolor(black) size( sztype(relative) val(1.52778) allow_pct(1)) angle(stdarrow) symangle(zero) backsymbol(none) backline( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) backcolor(black) backsize( sztype(relative) val(0) allow_pct(1)) backangle(stdarrow) backsymangle(zero)) headpos(neither) editcopy
		gr_edit .added_lines[2].style.editstyle headstyle(size(medium)) editcopy
		gr_edit .added_lines[2].style.editstyle headpos(head) editcopy

		gr_edit .AddTextBox added_text editor 15 39
		gr_edit .added_text_new = 1
		gr_edit .added_text_rec = 1
		gr_edit .added_text[1].style.editstyle  angle(default) size( sztype(relative) val(2) allow_pct(1)) color(black) horizontal(right) vertical(middle) margin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) linegap( sztype(relative) val(0) allow_pct(1)) drawbox(no) boxmargin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) fillcolor(bluishgray) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) box_alignment(east) editcopy
		gr_edit .added_text[1].style.editstyle size(`arrow_font_size') editcopy
		gr_edit .added_text[1].text = {}
		gr_edit .added_text[1].text.Arrpush `"{it:Greater in rural}"'

		gr_edit .AddTextBox added_text editor 15 22
		gr_edit .added_text_new = 2
		gr_edit .added_text_rec = 2
		gr_edit .added_text[2].style.editstyle  angle(default) size( sztype(relative) val(2) allow_pct(1)) color(black) horizontal(left) vertical(middle) margin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) linegap( sztype(relative) val(0) allow_pct(1)) drawbox(no) boxmargin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) fillcolor(bluishgray) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) box_alignment(east) editcopy
		gr_edit .added_text[2].style.editstyle size(`arrow_font_size`'') editcopy
		gr_edit .added_text[2].text = {}
		gr_edit .added_text[2].text.Arrpush `"{it:Lower in rural}"'

	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Figures"
	graph save "weq_edu_forest_plot_${date}.gph", replace         
	graph export "weq_edu_forest_plot_${date}.pdf", as(pdf) name(weq_edu_forest_plot) replace
		
frame change default
frame drop weq_edu_forestplot

		

////////////////////////////////////////////////////////////////////////////////
////////////////// SENSITIVITY ANALYSIS 3: A1c 7.0 /////////////////////////////
////////////////////////////////////////////////////////////////////////////////

/*******************************************************************************
GENERATING OUTCOMES
*******************************************************************************/	

* Cascade of care --------------------------------------------------------------

	* Cascade: Tested
	gen a1c_dm_test = .
	replace a1c_dm_test = 0 if bg_ms_new == 0 & clin_dia == 1
	replace a1c_dm_test = 1 if bg_ms_new == 1 & clin_dia == 1
	
	* Cascade: Diagnosed
	gen a1c_dm_diag = .
	replace a1c_dm_diag = 0 if hbg_new == 0 & clin_dia == 1
	replace a1c_dm_diag = 1 if hbg_new == 1 & clin_dia == 1
	
* Treatment --------------------------------------------------------------------
			
	* Treatment: Glucose-lowering med, among people with A1c >= 7
	gen a1c_gluc_low_subpop = .
	replace a1c_gluc_low_subpop = 1 if clin_dia == 1 & ///
		((insulin == 1 | dia_med == 1) | /// using agent
		(fbg_new >= 8.0 & fast_new == 1) | (hba1c_p >= 7 & hba1c_p <30)) // uncontrolled fbp, fasting not assumed
					
	gen a1c_gluc_low = .
	replace a1c_gluc_low = 0 if dia_med_new == 0 & a1c_gluc_low_subpop == 1
	replace a1c_gluc_low = 1 if dia_med_new == 1 & a1c_gluc_low_subpop == 1
	* replace a1c_gluc_low = . if inlist(hbg_new,.,0)

	* Treatment: Anti-hypertension med, among people with hypertension
	gen a1c_bp_low_subpop = 0
	replace a1c_bp_low_subpop = 1 if clin_dia == 1 & clin_hypt == 1
	
	gen a1c_bp_low = .
	replace a1c_bp_low = 0 if hypt_med_new == 0 & clin_dia == 1 & clin_hypt == 1
	replace a1c_bp_low = 1 if hypt_med_new == 1 & clin_dia == 1 & clin_hypt == 1
	replace a1c_bp_low = . if inlist(country,"Romania","Fiji") // no hypt_med so unable to calculate
	* replace a1c_bp_low = . if inlist(hbg_new,.,0)

	* Treatment: Cholesterol-lowering med, among people 40+ years
	gen a1c_lipid_low = .
	replace a1c_lipid_low = 0 if statin == 0 & inrange(age,40,110) & clin_dia == 1 // age restrictions applied elsewhere
	replace a1c_lipid_low = 1 if statin == 1 & inrange(age,40,110) & clin_dia == 1 // age restrictions applied elsewhere
	* replace a1c_lipid_low = . if inlist(hbg_new,.,0)
	
* Control ----------------------------------------------------------------------	
	
	* Cascade: Glycemic control
	gen a1c_dm_control = .
	replace a1c_dm_control = 0 if clin_dia == 1 & hbg_new == 1 
	replace a1c_dm_control = 1 if clin_dia == 1 & hbg_new == 1 & ///
		((fbg_new < 8.0 & fast_new == 1) | (hba1c_p <7.0)) // uncontrolled fbp, fasting not assumed
		
	* Cascade: Blood pressure control
	gen a1c_hy_control = .
	replace a1c_hy_control = 0 if (inrange(sbp_avg,140,300) | inrange(dbp_avg,90,170)) & clin_dia == 1 & hbg_new == 1
	replace a1c_hy_control = 1 if (sbp_avg <140 & dbp_avg<90) & clin_dia == 1 & hbg_new == 1
		
	* Cascade: Cholesterol control
	gen a1c_lipid_control = .
	replace a1c_lipid_control = 0 if ((age <40 & tchol != . & statin != .) | (inrange(age,40,110) & statin != .)) & clin_dia == 1 & hbg_new == 1
	replace a1c_lipid_control = 1 if ((age <40 & tchol <190 & statin != .) | (inrange(age,40,110) & statin == 1)) & clin_dia == 1 & hbg_new == 1

	* Cascade: Not smoking
	gen a1c_not_csmoke = .
	replace a1c_not_csmoke = 1 if csmoke == 0 & clin_dia == 1 & hbg_new == 1
	replace a1c_not_csmoke = 0 if csmoke == 1 & clin_dia == 1 & hbg_new == 1

	* Cascade: Triple control
	gen dm_a1c_triple = .
	replace dm_a1c_triple = 0 if a1c_dm_control != . & a1c_hy_control != . & a1c_lipid_control != .
	replace dm_a1c_triple = 1 if a1c_dm_control == 1 & a1c_hy_control == 1 & a1c_lipid_control == 1
	
	* Cascade: Double control
	gen dm_a1c_double = .
	replace dm_a1c_double = 0 if a1c_dm_control != . & a1c_hy_control != .
	replace dm_a1c_double = 1 if a1c_dm_control == 1 & a1c_hy_control == 1
	
	* Cascade: Quad control = triple control + smoking
	gen dm_a1c_quad = .
	replace dm_a1c_quad = 0 if a1c_dm_control != . & a1c_hy_control != . & a1c_lipid_control != . & a1c_not_csmoke != .
	replace dm_a1c_quad = 1 if a1c_dm_control == 1 & a1c_hy_control == 1 & a1c_lipid_control == 1 & a1c_not_csmoke == 1


/*******************************************************************************
weq_a1c REGRESSIONS
*******************************************************************************/

global dependent_vars_a1c "a1c_dm_test a1c_dm_diag a1c_gluc_low a1c_bp_low a1c_lipid_low a1c_dm_control a1c_hy_control a1c_lipid_control dm_a1c_double dm_a1c_triple"	

foreach outcome of varlist $dependent_vars_a1c {			

		* Generate sample
		gen sample = 1 if analytic_sample == 1 & `outcome' != . & rural != . & sex != .

		* Rescaling weights
			* bys country: egen sum_wpop_sample=sum(w_fbg) if sample==1   // pop weights not needed
			* gen wpopnr_sample=w_fbg*Population2015/sum_wpop_sample

			bys country: egen sum_weq_sample=sum(w_fbg) if sample==1		
			gen weqnr_sample=w_fbg/sum_weq_sample
		
		* Make age spline
			mkspline age_spline = age if sample == 1, cubic nknots(5) displayknots
		
		* Set svyset
		svyset psu_num[pw = weqnr_sample], strata(stratum_num) singleunit(centered) // note weights are not rescaled
		
		* Raw proportions
		svy, subpop(sample): proportion `outcome', over(rural)
		matlist r(table)
		matrix weq_a1c_prop_`outcome' = r(table)
		
		* Regression
		svy, subpop(sample): poisson `outcome' i.rural c.age_spline* i.country_encoded, irr baselevels
		matlist r(table)
		matrix weq_a1c_rr_`outcome' = r(table)
		
		* Predictive margins
		margins rural, vce(unconditional)
		matlist r(table)
		matrix weq_a1c_pred_`outcome' = r(table)
										
		* dydx margins
		margins, dydx(rural) vce(unconditional)
		matlist r(table)
		matrix weq_a1c_dydx_`outcome' = r(table)
		
		* Drop variables for loop
		drop sample weqnr_sample sum_weq_sample age_spline*
		}	

/*******************************************************************************
weq_a1c RR FOREST PLOTS
*******************************************************************************/

* 1. Generating the spreadsheet using putexcel ---------------------------------

	/*	This step creates an Excel file with the risk ratios and average marginal
		effects for the aggregate weq_a1c regression output.
	*/

* Create the Excel doc
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Putexcel tables"
putexcel set "weq_a1c rr forestplot ${date}.xlsx", replace 

* Making Excel frame
putexcel A1 = "name"
putexcel A2 = "Ever tested"
putexcel A3 = "Awareness"
putexcel A4 = "Glucose-lowering med"
putexcel A5 = "BP-lowering med"
putexcel A6 = "Statin"
putexcel A7 = "Glycemic control"
putexcel A8 = "BP control"
putexcel A9 = "Cholesterol control"
putexcel A10 = "Combined AB control"
putexcel A11 = "Combined ABC control"
	
putexcel B1 = "est_rr"
putexcel C1 = "lci_rr"
putexcel D1 = "uci_rr"
putexcel E1 = "est_dydx"
putexcel F1 = "lci_dydx"
putexcel G1 = "uci_dydx"
putexcel H1 = "p_value"
putexcel I1 = "est_urban_pred"
putexcel J1 = "lci_urban_pred"
putexcel K1 = "uci_urban_pred"
putexcel L1 = "est_rural_pred"
putexcel M1 = "lci_rural_pred"
putexcel N1 = "uci_rural_pred"

* Exporting to Excel

local n = 1

foreach outcome in $dependent_vars_a1c {	
	
	local n = `n' + 1
	
	matlist weq_a1c_rr_`outcome'
	matrix results = weq_a1c_rr_`outcome'

	local b = results[1,2]
	local lci = results[5,2]
	local uci = results[6,2]
	local p = results[4,2]

	putexcel B`n' = `b'
	putexcel C`n' = `lci'
	putexcel D`n' = `uci'
	putexcel H`n' = `p'
	
	matlist weq_a1c_dydx_`outcome'
	matrix results = weq_a1c_dydx_`outcome'

	local b = results[1,2]*100
	local lci = results[5,2]*100
	local uci = results[6,2]*100

	putexcel E`n' = `b'
	putexcel F`n' = `lci'
	putexcel G`n' = `uci'		

	matlist weq_a1c_pred_`outcome'
	matrix results = weq_a1c_pred_`outcome'
	
	putexcel I`n' = results[1,1]*100
	putexcel J`n' = results[5,1]*100
	putexcel K`n' = results[6,1]*100
	putexcel L`n' = results[1,2]*100
	putexcel M`n' = results[5,2]*100
	putexcel N`n' = results[6,2]*100
	
}

* 2. Generating weq_a1c rr forestplots ---------------------------------------------

frame create weq_a1c_forestplot
frame change weq_a1c_forestplot
	
import excel "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Putexcel tables/weq_a1c rr forestplot $date.xlsx", sheet("Sheet1") firstrow cellrange(A1:N11) clear

insobs 1, before(1)
insobs 2, after(3)
insobs 2, after(8)

gen id = _n

replace name = "{bf:Diagnosis}" if id == 1
replace name = " " if id == 4
replace name = "{bf:Treatment}" if id == 5
replace name = " " if id == 9
replace name = "{bf:Control}" if id == 10

* See help for what each of the "_USE" categories refers to	
gen _USE = 1
replace _USE = 0 if inlist(id,1,5,10)
replace _USE = 6 if missing(name)

* Transforming for eform display in forestplot
gen est_rr_exp = log(est_rr)
gen lci_rr_exp = log(lci_rr)
gen uci_rr_exp = log(uci_rr)

* Making string variables	
gen pvalue_s = string(p_value,"%4.3f")
replace pvalue_s = "<0.001" if p_value <0.001
replace pvalue_s = "" if p_value == .

gen est_rr_s = string(est_rr,"%9.2f")
gen lci_rr_s = string(lci_rr,"%9.2f")
gen uci_rr_s = string(uci_rr,"%9.2f")
gen output_rr_s = est_rr_s + " (" + lci_rr_s + " to " + uci_rr_s + ")"
replace output_rr_s = "" if est_rr == .

gen est_dydx_s = string(est_dydx,"%9.1f")
gen lci_dydx_s = string(lci_dydx,"%9.1f")
gen uci_dydx_s = string(uci_dydx,"%9.1f")
gen output_dydx_s = est_dydx_s + " (" + lci_dydx_s + " to " + uci_dydx_s + ")"
replace output_dydx_s = "" if est_dydx == .

gen est_urban_pred_s = string(est_urban_pred,"%9.1f")
gen lci_urban_pred_s = string(lci_urban_pred,"%9.1f")
gen uci_urban_pred_s = string(uci_urban_pred,"%9.1f")
gen output_urban_pred_s = est_urban_pred_s + " (" + lci_urban_pred_s + " to " + uci_urban_pred_s + ")"
replace output_urban_pred_s = "" if est_urban_pred == .

gen est_rural_pred_s = string(est_rural_pred,"%9.1f")
gen lci_rural_pred_s = string(lci_rural_pred,"%9.1f")
gen uci_rural_pred_s = string(uci_rural_pred,"%9.1f")
gen output_rural_pred_s = est_rural_pred_s + " (" + lci_rural_pred_s + " to " + uci_rural_pred_s + ")"
replace output_rural_pred_s = "" if est_rural_pred == .

* Realiigning
leftalign

* Adding bolded labels		
label var name `"`"{bf:Outcome}"'"'
label var output_rr_s `"`"{bf:Risk ratio}"'"'
label var output_dydx_s `"`"{bf:Average marginal}"' `"{bf:effect (%)}"'"'
label var pvalue_s `"`"{bf:P value}"'"'
label var output_urban_pred_s `"`"{bf:Urban adjusted}"' `"{bf:proportion (%)}"'"'
label var output_rural_pred_s `"`"{bf:Rural adjusted}"' `"{bf:proportion (%)}"'"'

format %-6s pvalue_s

/*	Note: This code uses the "forestplot" program built by David Fisher. He has a number
	of variable helpful posts on Statalist:
	
	https://www.statalist.org/forums/forum/general-stata-discussion/general/1471066-editing-headings-on-metan-forest-plots
*/

local line_size ".2rs"
local forest_font_size "2.1"
local arrow_font_size "2"	

forestplot est_rr_exp lci_rr_exp uci_rr_exp, ///
	labels(name) ///
	eform ///
	nostats /// this tells it not to calculate stats but rather use raw input data
	rcols(output_rr_s output_dydx_s pvalue_s output_urban_pred_s output_rural_pred_s) /// this tells forestplot which columsn to display
	range(0.25 1.8) /// range of the plot
	xlabel(0.25 0.5 1 2, labsize(2)) /// plot labels xmtick(0.3(0.1)1.5) ///
	astext(85) ///
	spacing(1.5) /// this refers to spacing by line
	/// style options
		diamopts(lwidth(`line_size')) ///
		boxopts(mcolor(none)) ///
		ciopts(lwidth(`line_size')) ///
		nlineopts(lwidth(`line_size')) ///
		olineopts(lwidth(0)) ///
		pointopts(msymbol(square) msize(0.25rs)) ///
	plotregion(margin(l=0 r=0 b=0rs t=0) lcolor(none)) ///
	graphregion(margin(l=0 r=0 b=0rs t=0))	///
	xsize(4) /// 	
	ysize(3) ///
	xtitle("Risk ratio", size(`forest_font_size')  margin(l=0 r=60 b=0 t=1)) ///
	/// favours("Less rural     " "achievement      " # "     More rural" "     achievement", labsize(2.1rs) nosymmetric) ///
	name(weq_a1c_forest_plot,replace) //

* Fixing formatting issues	----------------------------------------------------

* Size of text in forestplot
	gr_edit plotregion1.plot5.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot6.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot7.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot8.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot9.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot10.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	
* Top black line
gr_edit plotregion1._xylines[1].style.editstyle linestyle(color(black)) editcopy // gray line at top

* Arrows and labels for "favors"
gr_edit .AddLine added_lines editor 40 17 50 17
		gr_edit .added_lines_new = 1
		gr_edit .added_lines_rec = 1
		gr_edit .added_lines[1].style.editstyle  linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) headstyle( symbol(circle) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) fillcolor(black) size( sztype(relative) val(1.52778) allow_pct(1)) angle(stdarrow) symangle(zero) backsymbol(none) backline( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) backcolor(black) backsize( sztype(relative) val(0) allow_pct(1)) backangle(stdarrow) backsymangle(zero)) headpos(neither) editcopy
		gr_edit .added_lines[1].style.editstyle headstyle(size(medium)) editcopy
		gr_edit .added_lines[1].style.editstyle headpos(head) editcopy

		gr_edit .AddLine added_lines editor 34 17 24 17
		gr_edit .added_lines_new = 2
		gr_edit .added_lines_rec = 2
		gr_edit .added_lines[2].style.editstyle  linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) headstyle( symbol(circle) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) fillcolor(black) size( sztype(relative) val(1.52778) allow_pct(1)) angle(stdarrow) symangle(zero) backsymbol(none) backline( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) backcolor(black) backsize( sztype(relative) val(0) allow_pct(1)) backangle(stdarrow) backsymangle(zero)) headpos(neither) editcopy
		gr_edit .added_lines[2].style.editstyle headstyle(size(medium)) editcopy
		gr_edit .added_lines[2].style.editstyle headpos(head) editcopy

		gr_edit .AddTextBox added_text editor 15 39
		gr_edit .added_text_new = 1
		gr_edit .added_text_rec = 1
		gr_edit .added_text[1].style.editstyle  angle(default) size( sztype(relative) val(2) allow_pct(1)) color(black) horizontal(right) vertical(middle) margin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) linegap( sztype(relative) val(0) allow_pct(1)) drawbox(no) boxmargin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) fillcolor(bluishgray) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) box_alignment(east) editcopy
		gr_edit .added_text[1].style.editstyle size(`arrow_font_size') editcopy
		gr_edit .added_text[1].text = {}
		gr_edit .added_text[1].text.Arrpush `"{it:Greater in rural}"'

		gr_edit .AddTextBox added_text editor 15 22
		gr_edit .added_text_new = 2
		gr_edit .added_text_rec = 2
		gr_edit .added_text[2].style.editstyle  angle(default) size( sztype(relative) val(2) allow_pct(1)) color(black) horizontal(left) vertical(middle) margin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) linegap( sztype(relative) val(0) allow_pct(1)) drawbox(no) boxmargin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) fillcolor(bluishgray) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) box_alignment(east) editcopy
		gr_edit .added_text[2].style.editstyle size(`arrow_font_size`'') editcopy
		gr_edit .added_text[2].text = {}
		gr_edit .added_text[2].text.Arrpush `"{it:Lower in rural}"'

	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Figures"
	graph save "weq_a1c_forest_plot_${date}.gph", replace         
	graph export "weq_a1c_forest_plot_${date}.pdf", as(pdf) name(weq_a1c_forest_plot) replace
		
frame change default
frame drop weq_a1c_forestplot
		
////////////////////////////////////////////////////////////////////////////////
////////////////// SENSITIVITY ANALYSIS 4: POPULATION WEIGHTS //////////////////
////////////////////////////////////////////////////////////////////////////////


* IMPORTING IN 2015 POPULATION FROM 18-69 YEARS OF AGE BY COUNTRY --------------


frame create population
frame change population

	/* 		
	
	This file imports a CSV document with the GBD population estimates for 2015 by country and year
	
	http://ghdx.healthdata.org/record/ihme-data/gbd-2019-population-estimates-1950-2019
	
	There is no data manipulation done to the CSV file downloaded from IHME prior
	to its import here.
	
	*/

import delimited "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/External country data/Population/IHME_GBD_2019_POP_SYA_2015_Y2021M01D28", clear 

rename (location_name) (country)

keep country year age_group_id sex_name val sex_name location_id

drop if sex_name != "both"

drop if location_id == 533
drop if country =="South Asia"
drop if country =="North Africa and Middle East"
drop sex_name sex_name

reshape wide val, i(country) j(age_group_id)

* Values 66-117 represent yearly age categories of 18-69
keep country val66-val117

* Generates the row total of the relevant age subgroups
egen population_2015_18_69 = rowtotal(val66-val117)

keep country population_2015_18_69

* Renaming some countries
replace country = "Laos" if country == "Lao People's Democratic Republic"
replace country = "Iran" if country == "Iran (Islamic Republic of)"
replace country = "Moldova" if country == "Republic of Moldova"
replace country = "St. Vincent & the Grenadines" if country == "Saint Vincent and the Grenadines"
replace country = "Tanzania" if country == "United Republic of Tanzania"
replace country = "Timor Leste" if country == "Timor-Leste"
replace country = "Vietnam" if country == "Viet Nam"

save "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Temporary datasets/GBD 2015 population.dta", replace

frame change default
frame drop population

* Performing the merge
merge m:1 country using "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Temporary datasets/GBD 2015 population.dta"

* Checking merged data
list country Population2015 population_2015_18_69 if country_id == 1

drop if _merge == 2
drop _merge

* Regressions ------------------------------------------------------------------

foreach outcome of varlist clin_dia $dependent_vars {			

		* Generate sample
		gen sample = 1 if analytic_sample == 1 & `outcome' != . & rural != . & sex != .

		* Rescaling weights
			bys country: egen sum_wpop_sample=sum(w_fbg) if sample==1   // pop weights not needed
			gen wpopnr_sample=w_fbg*population_2015_18_69/sum_wpop_sample

			* bys country: egen sum_weq_sample=sum(w_fbg) if sample==1		
			* gen weqnr_sample=w_fbg/sum_weq_sample
		
		* Make age spline
			mkspline age_spline = age if sample == 1, cubic nknots(5) displayknots
		
		* Set svyset
		svyset psu_num[pw = wpopnr_sample], strata(stratum_num) singleunit(centered) // note weights are not rescaled
		
		* Raw proportions
		svy, subpop(sample): proportion `outcome', over(rural)
		matlist r(table)
		matrix wpop_prop_`outcome' = r(table)
		
		* Regression
		svy, subpop(sample): poisson `outcome' i.rural c.age_spline* i.country_encoded, irr baselevels
		matlist r(table)
		matrix wpop_rr_`outcome' = r(table)
		
		* Predictive margins
		margins rural, vce(unconditional)
		matlist r(table)
		matrix wpop_pred_`outcome' = r(table)
										
		* dydx margins
		margins, dydx(rural) vce(unconditional)
		matlist r(table)
		matrix wpop_dydx_`outcome' = r(table)
		
		* Drop variables for loop
		drop sample sum_wpop_sample wpopnr_sample age_spline*
		}	

/*******************************************************************************
WPOP RR FOREST PLOTS
*******************************************************************************/

* 1. Generating the spreadsheet using putexcel ---------------------------------

	/*	This step creates an Excel file with the risk ratios and average marginal
		effects for the aggregate wpop regression output.
	*/

* Create the Excel doc
cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Putexcel tables"
putexcel set "wpop rr forestplot ${date}.xlsx", replace 

* Making Excel frame
putexcel A1 = "name"
putexcel A2 = "Ever tested"
putexcel A3 = "Awareness"
putexcel A4 = "Glucose-lowering med"
putexcel A5 = "BP-lowering med"
putexcel A6 = "Statin"
putexcel A7 = "Glycemic control"
putexcel A8 = "BP control"
putexcel A9 = "Cholesterol control"
putexcel A10 = "Combined AB control"
putexcel A11 = "Combined ABC control"
	
putexcel B1 = "est_rr"
putexcel C1 = "lci_rr"
putexcel D1 = "uci_rr"
putexcel E1 = "est_dydx"
putexcel F1 = "lci_dydx"
putexcel G1 = "uci_dydx"
putexcel H1 = "p_value"
putexcel I1 = "est_urban_pred"
putexcel J1 = "lci_urban_pred"
putexcel K1 = "uci_urban_pred"
putexcel L1 = "est_rural_pred"
putexcel M1 = "lci_rural_pred"
putexcel N1 = "uci_rural_pred"

* Exporting to Excel

local n = 1

foreach outcome in $dependent_vars {	
	
	local n = `n' + 1
	
	matlist wpop_rr_`outcome'
	matrix results = wpop_rr_`outcome'

	local b = results[1,2]
	local lci = results[5,2]
	local uci = results[6,2]
	local p = results[4,2]

	putexcel B`n' = `b'
	putexcel C`n' = `lci'
	putexcel D`n' = `uci'
	putexcel H`n' = `p'
	
	matlist wpop_dydx_`outcome'
	matrix results = wpop_dydx_`outcome'

	local b = results[1,2]*100
	local lci = results[5,2]*100
	local uci = results[6,2]*100

	putexcel E`n' = `b'
	putexcel F`n' = `lci'
	putexcel G`n' = `uci'
	
	matlist wpop_pred_`outcome'
	matrix results = wpop_pred_`outcome'
	
	putexcel I`n' = results[1,1]*100
	putexcel J`n' = results[5,1]*100
	putexcel K`n' = results[6,1]*100
	putexcel L`n' = results[1,2]*100
	putexcel M`n' = results[5,2]*100
	putexcel N`n' = results[6,2]*100

}

* 2. Generating wpop rr forestplots ---------------------------------------------

frame create wpop_forestplot
frame change wpop_forestplot
	
import excel "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Putexcel tables/wpop rr forestplot $date.xlsx", sheet("Sheet1") firstrow cellrange(A1:N11) clear

insobs 1, before(1)
insobs 2, after(3)
insobs 2, after(8)

gen id = _n

replace name = "{bf:Diagnosis}" if id == 1
replace name = " " if id == 4
replace name = "{bf:Treatment}" if id == 5
replace name = " " if id == 9
replace name = "{bf:Control}" if id == 10

* See help for what each of the "_USE" categories refers to	
gen _USE = 1
replace _USE = 0 if inlist(id,1,5,10)
replace _USE = 6 if missing(name)

* Transforming for eform display in forestplot
gen est_rr_exp = log(est_rr)
gen lci_rr_exp = log(lci_rr)
gen uci_rr_exp = log(uci_rr)

* Making string variables	
gen pvalue_s = string(p_value,"%4.3f")
replace pvalue_s = "<0.001" if p_value <0.001
replace pvalue_s = "" if p_value == .

gen est_rr_s = string(est_rr,"%9.2f")
gen lci_rr_s = string(lci_rr,"%9.2f")
gen uci_rr_s = string(uci_rr,"%9.2f")
gen output_rr_s = est_rr_s + " (" + lci_rr_s + " to " + uci_rr_s + ")"
replace output_rr_s = "" if est_rr == .

gen est_dydx_s = string(est_dydx,"%9.1f")
gen lci_dydx_s = string(lci_dydx,"%9.1f")
gen uci_dydx_s = string(uci_dydx,"%9.1f")
gen output_dydx_s = est_dydx_s + " (" + lci_dydx_s + " to " + uci_dydx_s + ")"
replace output_dydx_s = "" if est_dydx == .

gen est_urban_pred_s = string(est_urban_pred,"%9.1f")
gen lci_urban_pred_s = string(lci_urban_pred,"%9.1f")
gen uci_urban_pred_s = string(uci_urban_pred,"%9.1f")
gen output_urban_pred_s = est_urban_pred_s + " (" + lci_urban_pred_s + " to " + uci_urban_pred_s + ")"
replace output_urban_pred_s = "" if est_urban_pred == .

gen est_rural_pred_s = string(est_rural_pred,"%9.1f")
gen lci_rural_pred_s = string(lci_rural_pred,"%9.1f")
gen uci_rural_pred_s = string(uci_rural_pred,"%9.1f")
gen output_rural_pred_s = est_rural_pred_s + " (" + lci_rural_pred_s + " to " + uci_rural_pred_s + ")"
replace output_rural_pred_s = "" if est_rural_pred == .

* Realiigning
leftalign

* Adding bolded labels		
label var name `"`"{bf:Outcome}"'"'
label var output_rr_s `"`"{bf:Risk ratio}"'"'
label var output_dydx_s `"`"{bf:Average marginal}"' `"{bf:effect (%)}"'"'
label var pvalue_s `"`"{bf:P value}"'"'
label var output_urban_pred_s `"`"{bf:Urban adjusted}"' `"{bf:proportion (%)}"'"'
label var output_rural_pred_s `"`"{bf:Rural adjusted}"' `"{bf:proportion (%)}"'"'

format %-6s pvalue_s

/*	Note: This code uses the "forestplot" program built by David Fisher. He has a number
	of variable helpful posts on Statalist:
	
	https://www.statalist.org/forums/forum/general-stata-discussion/general/1471066-editing-headings-on-metan-forest-plots
*/

local line_size ".2rs"
local forest_font_size "2.1"
local arrow_font_size "2"	

forestplot est_rr_exp lci_rr_exp uci_rr_exp, ///
	labels(name) ///
	eform ///
	nostats /// this tells it not to calculate stats but rather use raw input data
	rcols(output_rr_s output_dydx_s pvalue_s output_urban_pred_s output_rural_pred_s) /// this tells forestplot which columsn to display
	range(0.25 1.8) /// range of the plot
	xlabel(0.25 0.5 1 2, labsize(2)) /// plot labels xmtick(0.3(0.1)1.5) ///
	astext(85) ///
	spacing(1.5) /// this refers to spacing by line
	/// style options
		diamopts(lwidth(`line_size')) ///
		boxopts(mcolor(none)) ///
		ciopts(lwidth(`line_size')) ///
		nlineopts(lwidth(`line_size')) ///
		olineopts(lwidth(0)) ///
		pointopts(msymbol(square) msize(0.25rs)) ///
	plotregion(margin(l=0 r=0 b=0rs t=0) lcolor(none)) ///
	graphregion(margin(l=0 r=0 b=0rs t=0))	///
	xsize(4) /// 	
	ysize(3) ///
	xtitle("Risk ratio", size(`forest_font_size')  margin(l=0 r=60 b=0 t=1)) ///
	/// favours("Less rural     " "achievement      " # "     More rural" "     achievement", labsize(2.1rs) nosymmetric) ///
	name(wpop_forest_plot,replace) //

* Fixing formatting issues	----------------------------------------------------

* Size of text in forestplot
	gr_edit plotregion1.plot5.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot6.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot7.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot8.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot9.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	gr_edit plotregion1.plot10.style.editstyle label(textstyle(size(`forest_font_size'))) editcopy
	
* Top black line
gr_edit plotregion1._xylines[1].style.editstyle linestyle(color(black)) editcopy // gray line at top

* Arrows and labels for "favors"
gr_edit .AddLine added_lines editor 40 17 50 17
		gr_edit .added_lines_new = 1
		gr_edit .added_lines_rec = 1
		gr_edit .added_lines[1].style.editstyle  linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) headstyle( symbol(circle) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) fillcolor(black) size( sztype(relative) val(1.52778) allow_pct(1)) angle(stdarrow) symangle(zero) backsymbol(none) backline( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) backcolor(black) backsize( sztype(relative) val(0) allow_pct(1)) backangle(stdarrow) backsymangle(zero)) headpos(neither) editcopy
		gr_edit .added_lines[1].style.editstyle headstyle(size(medium)) editcopy
		gr_edit .added_lines[1].style.editstyle headpos(head) editcopy

		gr_edit .AddLine added_lines editor 34 17 24 17
		gr_edit .added_lines_new = 2
		gr_edit .added_lines_rec = 2
		gr_edit .added_lines[2].style.editstyle  linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) headstyle( symbol(circle) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) fillcolor(black) size( sztype(relative) val(1.52778) allow_pct(1)) angle(stdarrow) symangle(zero) backsymbol(none) backline( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) backcolor(black) backsize( sztype(relative) val(0) allow_pct(1)) backangle(stdarrow) backsymangle(zero)) headpos(neither) editcopy
		gr_edit .added_lines[2].style.editstyle headstyle(size(medium)) editcopy
		gr_edit .added_lines[2].style.editstyle headpos(head) editcopy

		gr_edit .AddTextBox added_text editor 15 39
		gr_edit .added_text_new = 1
		gr_edit .added_text_rec = 1
		gr_edit .added_text[1].style.editstyle  angle(default) size( sztype(relative) val(2) allow_pct(1)) color(black) horizontal(right) vertical(middle) margin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) linegap( sztype(relative) val(0) allow_pct(1)) drawbox(no) boxmargin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) fillcolor(bluishgray) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) box_alignment(east) editcopy
		gr_edit .added_text[1].style.editstyle size(`arrow_font_size') editcopy
		gr_edit .added_text[1].text = {}
		gr_edit .added_text[1].text.Arrpush `"{it:Greater in rural}"'

		gr_edit .AddTextBox added_text editor 15 22
		gr_edit .added_text_new = 2
		gr_edit .added_text_rec = 2
		gr_edit .added_text[2].style.editstyle  angle(default) size( sztype(relative) val(2) allow_pct(1)) color(black) horizontal(left) vertical(middle) margin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) linegap( sztype(relative) val(0) allow_pct(1)) drawbox(no) boxmargin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) fillcolor(bluishgray) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) box_alignment(east) editcopy
		gr_edit .added_text[2].style.editstyle size(`arrow_font_size`'') editcopy
		gr_edit .added_text[2].text = {}
		gr_edit .added_text[2].text.Arrpush `"{it:Lower in rural}"'


	cd "/Users/davidflood/Dropbox (University of Michigan)/HPACC/Rural-urban paper/Figures"
	graph save "wpop_forest_plot_${date}.gph", replace         
	graph export "wpop_forest_plot_${date}.pdf", as(pdf) name(wpop_forest_plot) replace
		
frame change default
frame drop wpop_forestplot



////////////////////////////////////////////////////////////////////////////////
//////////////////////// TOTAL COVERAGE CALCULATIONS: //////////////////////////
////////////////////////////////////////////////////////////////////////////////

/* These calculations estimate the total population ages 18-69 represented among 
surveys included in this analysis. */

list country population_2015_18_69 if country_id == 1

	/* 	This comes out to 2,674,314,475 people in the included countries.
		Note that Zanzibar is included into Tanzania for these calculations.
	*/	

/* Total population 18-69 in LMICs 

	You can find this by going into the IHME spreadhseet and drilling down to 
	World Bank Income groups, then summing together ages 18-69 among LMICs.
	
	3,903,074,649
*/

di 2674314475/3903074649 // 68.52%
